Overall

Column

Fish Production by Lake

Fish Production by Species

Column

Fish Production by Lake and Species

Most Common Type of Fish Species in Each Lake

Lakes that the 5 most produced species are found in

Peak Years of Production

Column

Peak Year of Production For Each Species

Column

Peak Year of Production For Each Lake

---
title: "Great Lakes Fish"
author: "Antony Rono"
date: 2021-06-08
output: 
  flexdashboard::flex_dashboard:
    orientation: columns
    vertical_layout: fill
    source_code: embed
    hrbrthemes::FT:
editor_options: 
  chunk_output_type: console
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE)

library(flexdashboard)
library(tidyverse)
library(tidytuesdayR)
library(scales)
library(plotly)
library(tidytext)

options(scipen = 99)
theme_set(theme_light())
```

```{r Load, include=FALSE}

#tt <- tt_load("2021-06-08")

fishing <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-06-08/fishing.csv')
stocked <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-06-08/stocked.csv')


```

```{r Cleaning data, include=FALSE}

fishing_countries <- fishing %>% 
  
  mutate(region = case_when(
    
    str_detect(region, "U.S. Total") ~ "U.S. Total",
    
    str_detect(region, "Canada (ONT)") ~ "Canada (ONT)",
    
    TRUE ~ region
    
  ),
  
  #mutate(region = str_replace(region, "Total Canada (ONT)", "Canada (ONT)"),
         
         #region = str_replace(region, "U.S. Total (MI)", "U.S. Total"),
         
         values = round(values*1000, 0),
         
         species = str_to_title(species),
         
         species = str_replace(species, "([^s])s$", "\\1")
         
         ) %>% 
  
  filter(region %in% c("Canada (ONT)", "U.S. Total"), values>=0, !is.na(values)) 


```

# Introduction {.sidebar}


This report examines fish productions in the Great Lakes for the period 1867-2015.

The report is part of my submission in the [#TidyTuesday challenge](https://github.com/rfordatascience/tidytuesday), a weekly data project on understanding how to analyze data to make meaningful and/or beautiful chart using R.

It is written in [R Markdown](http://rmarkdown.rstudio.com), an authoring format that enables easy creation of dynamic documents, presentations, and reports from R. R Markdown combines the core syntax of **markdown** (an easy to write plain text format) with embedded **R** code chunks that are run so their output can be included in the final document.

The data comes from [Great Lakes Fishery Commission. ](http://www.glfc.org/great-lakes-databases.php).

Full details on the data can be found on their [statistic notes](http://www.glfc.org/commercial/COMMERCIAL%20FISH%20PRODUCTION_Notes%20on%20Statistics.pdf) and [background notes](http://www.glfc.org/commercial/COMMERCIAL%20FISH%20PRODUCTION_background.pdf)


Overall {data-icon="fa-map-marker-alt"}
=======================================================================

Column
-----------------------------------------------------------------------

### Fish Production by Lake

```{r Fish production by lake}

p <- fishing_countries %>% 
  
  count(lake,  wt = values, name = "production") %>% 
  
  mutate(lake = fct_reorder(lake, production, .desc = TRUE)) %>% 
  
  ggplot(aes(lake, production, fill = lake)) +
  
  geom_col() +
  
  scale_y_continuous(labels = scales::unit_format(unit = "M", scale = 1e-6, prefix = "£")) +
  
  theme(legend.position = "none")

ggplotly(p, tooltip = c("x", "y"))

```

### Fish Production by Species

```{r Fish production by Species}

p <- fishing_countries %>% 
  
  count(species,  wt = values, name = "production", sort = TRUE) %>% 
  
  ungroup() %>% 
  
  mutate(species = fct_lump(species, 15, w = production)) %>% 
  
  count(species,  wt = production, name = "production", sort = TRUE) %>% 
  
  mutate(species = fct_reorder(species, production, .desc = FALSE)) %>% 
  
  ggplot(aes(species, production, fill = species)) +
  
  geom_col() +
  
  scale_y_continuous(labels = scales::unit_format(unit = "M", scale = 1e-6,prefix = "£")) +
  
  theme(legend.position = "none") +
  
  coord_flip()
  
ggplotly(p, tooltip = c("x", "y"))


```

Column {.tabset}
-----------------------------------------------------------------------

### Fish Production by Lake and Species

```{r production by lake and species heatmap}

p <- fishing_countries %>% 
  
  mutate(species = fct_lump(species, 20, w = values),
         
         species = fct_reorder(species, values, sum, .desc = FALSE),
         
         lake = fct_reorder(lake, values, sum, .desc = TRUE)
         
         ) %>% 
  
  count(lake, species, wt = values, name = "production") %>% 
  
  group_by(lake) %>% 
  
  mutate(pct = production / sum(production)) %>% 
  
  ggplot(aes(lake, species, fill = pct)) +
  
  geom_tile() +
  
    scale_fill_gradient(low = "white",
                      high = "#e6550d",
                      guide = "colorbar",
                      #labels = scales::unit_format(unit = "M", scale = 1e-6, prefix = "£"),
                      labels = percent_format(accuracy = 1)
                      ) +
  
  expand_limits(fill = 0) +
  
  theme(panel.grid = element_blank()) +
  
    labs(x = "Lake",
       
       y = "Species",
       
       fill = "All-time production",
       
       fill = "% of lake's production" )


ggplotly(p)
```


### Most Common Type of Fish Species in Each Lake

```{r most common type of fish for each lake}

p <- fishing_countries %>% 
  
  count(lake, species,  wt = values, name = "production", sort = TRUE) %>% 
  
  group_by(lake) %>% 
  
  slice_max(order_by = production,n= 5) %>% 
  
  ungroup() %>% 
  
  mutate(lake = paste ("Lake", lake)) %>% 
  
  #mutate(species = str_wrap(species, 5)) %>% 
  
  ggplot(aes(y = tidytext::reorder_within(species, production,lake),
             x =  production)) +
  
  geom_point(aes(size = production,color = species)) +
  
  geom_text(aes(label = species), size = 3,
            nudge_y = .3) +
  
  scale_x_continuous(labels = scales::unit_format(unit = "M", scale = 1e-6, prefix = "£"),
                     
                     expand = c(1,0)
                     ) +
  
  theme(axis.title.y =element_blank(),
        
        axis.text.y=element_blank(),
        
        axis.ticks.y=element_blank(),
        
        axis.text.x = element_text(size = 7),
        
        strip.background = element_rect(fill = "white"),
        
        #strip.background = element_blank(),
        
        panel.border = element_rect(colour = "black", fill = NA),
        
        strip.text = element_text(colour = "black", size = 12, face = "bold"),
        
        legend.position = "none"
        ) +
  
  facet_wrap(~lake, scales = "free")

ggplotly(p, tooltip = c("x"))
```

### Lakes that the 5 most produced species are found in

```{r Lakes that the 5 most produces species are found in}

p <- fishing_countries %>% 
  
  count(lake, species,  wt = values, name = "production", sort = TRUE) %>% 
  
  filter(species %in% (fishing_countries %>% count(species, wt = values) %>% slice_max(n, n = 5))$species) %>% 

  group_by(species) %>% 
  
  mutate(total = sum(production)) %>% 
  
  ungroup() %>% 
  
  mutate(species = fct_reorder(species, total, .desc = TRUE)) %>% 
  
  ggplot(aes(y = tidytext::reorder_within(lake, production,species),
             x =  production)) +
  
  geom_point(aes(size = production, color = lake)) +
  
  geom_text(aes(label = lake), size = 3,
            nudge_y = .3) +
  
  scale_x_continuous(labels = scales::unit_format(unit = "M", scale = 1e-6, prefix = "£"),
                     
                     expand = c(1,0)
                     ) +
  
  theme(axis.title.y =element_blank(),
        
        axis.text.y=element_blank(),
        
        axis.ticks.y=element_blank(),
        
        axis.text.x = element_text(size = 7),
        
        strip.background = element_rect(fill = "white"),
        
        panel.border = element_rect(colour = "black", fill = NA),
        
        strip.text = element_text(colour = "black", size = 12, face = "bold"), 
        
        legend.position = "none"
        ) +
  
  facet_wrap(~species, scales = "free")

ggplotly(p, tooltip = c("x"))

```


Yearly Trends {data-orientation=rows data-icon="fa-chart-line"}
=======================================================================

Row
-----------------------------------------------------------------------

### Trend In Fish Production by Country

```{r trend in production amount by country}

p <- fishing_countries %>% 
  
  count(year, region, wt = values, name = "production") %>% 
  
  mutate(production = round(production, 0)) %>% 
  
  filter(!is.na(production)) %>% 
  
  ggplot(aes(year, production, fill = region)) +
  
  geom_area() +
  
  scale_y_continuous(labels = scales::unit_format(unit = "M", scale = 1e-6,prefix = "£")) 


ggplotly(p, tooltip = c("fill", "x", "y"))

```

Row
-----------------------------------------------------------------------

### Trend In Fish Production by Lake

```{r trend in production amount by lake}

p <- fishing_countries %>% 
  
  count(year, lake, wt = values, name = "production") %>% 
  
  mutate(production = round(production, 0),
         
         lake = fct_reorder(lake, production, .desc = FALSE)) %>% 
  
  filter(!is.na(production)) %>% 
  
  ggplot(aes(year, production, fill = lake)) +
  
  geom_area() +
  
  scale_y_continuous(labels = scales::unit_format(unit = "M", scale = 1e-6,prefix = "£")) 

ggplotly(p, tooltip = c("fill", "x", "y"))


```
Row
-----------------------------------------------------------------------

### Trend In Fish Production for Top 5 Species

```{r trend in production amount by top 5 species}

p <- fishing_countries %>% 
  
  count(year, species,  wt = values, name = "production", sort = TRUE) %>% 
  
  filter(species %in% (fishing_countries %>% count(species, wt = values) %>% slice_max(n, n = 5))$species) %>% 

  group_by(species) %>% 
  
  mutate(total = sum(production)) %>% 
  
  ungroup() %>% 
  
  mutate(species = fct_reorder(species, total)) %>% 
  
  ggplot(aes(year, production, fill = species)) +
  
  geom_area() +
  
  scale_y_continuous(labels = scales::unit_format(unit = "M", scale = 1e-6,prefix = "£")) 
  

ggplotly(p)

```

Peak Years of Production {data-orientation=columns data-icon="fa-chart-bar"}
=======================================================================

Column
-----------------------------------------------------------------------

### Peak Year of Production For Each Species

```{r Fish Productiom Peak Year by species}

p <- fishing_countries %>% 
  
  group_by(species, year) %>% 
  
  summarise(production = sum(values)) %>% 
  
  ungroup() %>% 
  
  group_by(species) %>% 
  
  mutate(peak_year = year[which.max(production)],
         
         total_production = sum(production)) %>% 
  
  ungroup() %>% 
  
  distinct(species, total_production, peak_year) %>% 
  
  mutate(species = fct_reorder(species, peak_year)
         
         ) %>% 
  
  slice_max(total_production, n = 15) %>% 
  
  ggplot(aes(peak_year, species, size = total_production, color = species)) +
  
  geom_point() +
  
  scale_size_continuous(labels = scales::unit_format(unit = "M", scale = 1e-6,prefix = "£")) +
  
  theme(legend.position = "none") +
  
  labs(x = "Year of peak production",
       
       y = "",
       
       size = "All-time production")
  
  #guides(color = FALSE)

ggplotly(p, tooltip = c("x", "y", "size"))

```
Column
-----------------------------------------------------------------------

### Peak Year of Production For Each Lake

```{r Fish Productiom Peak Year by lake}

p <- fishing_countries %>% 
  
  group_by(lake, year) %>% 
  
  summarise(production = sum(values)) %>% 
  
  ungroup() %>% 
  
  group_by(lake) %>% 
  
  mutate(peak_year = year[which.max(production)],
         
         total_production = sum(production)) %>% 
  
  ungroup() %>% 
  
  distinct(lake, total_production, peak_year) %>% 
  
  mutate(species = fct_reorder(lake, peak_year)
         
         ) %>% 
  
  slice_max(total_production, n = 15) %>% 
  
  ggplot(aes(peak_year, lake, size = total_production, color = lake)) +
  
  geom_point() +
  
  scale_size_continuous(labels = scales::unit_format(unit = "M", scale = 1e-6,prefix = "£")) +
  
  theme(legend.position = "none") +
  
  labs(x = "Year of peak production",
       
       y = "",
       
       size = "All-time production")
  
  #guides(color = FALSE)

ggplotly(p, tooltip = c("x", "y", "size"))

```